Sea Level Monitor analysis

The Sea Level Monitor methodology is described in detail in Deltares (2023). Principles are:

This document is executed with the following parameters:

data.frame(
  "name" = names(unlist(params)),
  "value" = unlist(params),
  row.names = NULL
  ) %>%
  knitr::kable(caption = "Values of document parameters")
Values of document parameters
name value
monitoryear 2024
startyear 1945
wind_or_surge_type GTSM
station1 Delfzijl
station2 Harlingen
station3 Den Helder
station4 IJmuiden
station5 Hoek van Holland
station6 Vlissingen
station7 Netherlands
station8 Netherlands (without Delfzijl)
modeltype1 linear
modeltype2 broken_linear
modeltype3 broken_squared

Get data from PSMSL

Annual average sea level data for the Dutch main stations is downloaded from the Permanent Service for Mean Sea Level site and combined with the Global Tide and Surge Model (GTSM) annual average surge values. In case PSMSL data is not available for the most recent year (2024-1)

# Get data from PSMSL data service
rlr_df <- read_yearly_psmsl_csv(mainstations_df$psmsl_id, filepath = "../../") 

In this analysis, measurements over the period 1945 to 2023 are considered.

if(params$monitoryear-1 == max(rlr_df$year)){
  cat("Mean annual sea level downloaded from PSMSL are availabale up to ", max(rlr_df$year), ", the time series is up to date. ")
} else {
  cat("Mean annual sea level downloaded from PSMSL are only available up to ", max(rlr_df$year), " and thus incomplete for the current analysis. In order to do a preliminary analysis, measurements from Rijkswatersataat Data Distribution Layer will be used for missing year(s). ")
}

Mean annual sea level downloaded from PSMSL are availabale up to 2023 , the time series is up to date.

# Check if PSMSL is up to date for analysis year

if(max(rlr_df$year) == params$monitoryear-1){
  refreshed_df <- rlr_df
  print("PSMSL time series is up-to-date and is used for analysis")
} else {
  print("The PSMSL time series is not complete. An attempt is made to complete the data using RWS DDL.")
  if(max(rlr_df$year) == params$monitoryear-2){ # robuuster maken. nu alleen check voor een ontbrekend jaar.
    # Check if ddl data from required year exist
    required_file <- file.path("../../data/rijkswaterstaat/ddl/annual_means/", paste0(params$monitoryear-1, ".csv"))
    
    if(file.exists(required_file)){
      ddl_datayear <- read_csv2(required_file)
      
    } else{
      cat("DDL data for year", params$monitoryear-1, "is not available", sep = " ")
    }
  }
}
FALSE [1] "PSMSL time series is up-to-date and is used for analysis"
# Get GTSM data from local file
gtsm <- read_yearly_gtsm(filename = "../../data/deltares/gtsm/gtsm_surge_annual_mean_main_stations.csv") |>
  mutate(year = year(ymd(t)))
# convert rlr tot nap2005
    refreshed_df <- rlr_df |> 
      mutate(
        height = rlr_height_mm - as.numeric(`nap-rlr`),
      ) |> 
      rename(station = name)

# check if psmsl data need to be completed with ddl data
    try(
      if(
        exists("ddl_datayear") & 
        !unique(ddl_datayear$year) %in% unique(refreshed_df$year)
      ){
        refreshed_df <- refreshed_df |> bind_rows(ddl_datayear)
      },
      silent = T
    )

    refreshed_df <- refreshed_df |>
      left_join(gtsm, by = c(station = "name", year = "year")) |>
      mutate(
        surge_anomaly = case_when(
          year >= 1950 ~ (1000 * surge - mean(1000 * surge, na.rm = T)), # meters to millimeters
          year < 1950 ~ 0
        )
      ) |>
      select(
        year,
        height,
        station,
        surge_anomaly
      ) %>%
      bind_rows(
        . |>
          group_by(year) |>
          summarise(
            height = mean(height, na.rm = T),
            surge_anomaly = mean(surge_anomaly, na.rm = T)
          ) |>
          mutate(
            station = "Netherlands"
          )
      ) %>%
      bind_rows(
        . |>
          filter(station %in% c("Vlissingen", "Hoek van Holland", "Den Helder", "Harlingen", "IJmuiden")) |>
          group_by(year) |>
          summarise(
            height = mean(height, na.rm = T),
            surge_anomaly = mean(surge_anomaly, na.rm = T)
          ) |>
          mutate(
            station = "Netherlands (without Delfzijl)"
          )
      ) |>
      addBreakPoints() %T>% 
      write_csv2("../../data/deltares/results/dutch-sea-level-monitor-export-stations-latest.csv") %>%
      filter(year >= params$startyear)

Compare with previous years analysis data

# previous year did not include gtsm for year 1950.
# Therefore comparison of surge is done for years > 1950

# range(df$surge_anomaly)
# range(refreshed_df$surge_anomaly, na.rm = T)

refreshed_df_filter <- refreshed_df %>% filter(year > 1950 & year < 2023)

ggplot() +
  geom_point(
    data = previous_df, 
    aes(x = year, y = height),
    shape = "+"
  ) +
  geom_point(
    data = refreshed_df, 
    aes(x = year, y = height), 
    color = "blue", 
    shape = 21, 
    fill = "transparent",
    size = 1
  ) +
  facet_wrap(c("station"), ncol = 3)
Comparison of measured sea level with previous year.

Comparison of measured sea level with previous year.

# previous year did not include gtsm for year 1950.
# Therefore comparison of surge is done for years > 1950

# range(df$surge_anomaly)
# range(refreshed_df$surge_anomaly, na.rm = T)

refreshed_df_filter <- refreshed_df %>% filter(year > 1950 & year < 2023)

ggplot() +
  geom_point(
    data = previous_df, 
    aes(x = year, y = surge_anomaly, color = as.character(params$monitoryear-1))
    ) +
  geom_point(
    data = refreshed_df, 
    aes(x = year, y = surge_anomaly, color = as.character(params$monitoryear)), 
    shape = 21, 
    fill = "transparent",
    size = 1
    ) +
  facet_wrap(c("station"), ncol = 3) +
  coord_cartesian(xlim = c(1950, params$monitoryear)) +
  labs(color = "monitor year")
Comparison of GTSM surge anomaly with previous year.

Comparison of GTSM surge anomaly with previous year.

The difference between the gtsm surge anomalies between the years is caused by the addition of two additional years, 1950 and 2023. The mean surge for some stations has changed due to this addition, causing the surge anomaly to move up or down. This has no consequences for the determination of the sea level trend, but it has effect on the corrected sea level.

Locations of the main stations

This document analyses the sea level trend of the main stations in the Netherlands using different models. Based on geographical coverage and available time series length six stations are considered to be “main tide gauge stations”.

Additionally, to calculate the average sea level and sea level trend along the Dutch Coast, the main station sea level is averaged for each year (virtual station “Netherlands”). Because the station “Delfzijl” has a considerable gap in vertical adjustment for subsidence, we also consider a variant in which Delfzijl is omitted from the main analysis selection (“Netherlands (without Delfzijl)”).

map_stations(df = refreshed_df, mainstations_df = mainstations_df, mainstations_locs = mainstations_locs)

Hoofdgetijstations in Nederland. Er is aangegeven welke stations zijn meegenomen in dit rekendocument.

Sea level measurements

In this section we look at sea level measurements. The global collection of tide gauge records at the PSMSL was used to access the data. There are two types of datasets the “Revised Local Reference” and “Metric”. For the Netherlands the difference is that the “Revised Local Reference” undoes the corrections from the NAP correction in 2005, to get a consistent dataset. Here we transform the RLR back to NAP (without undoing the correction).

The rlrnap computes the rlr back to latest NAP (ignoring the undoing of the NAP correction) the alpha paramater is the dominant wind direction for the stations, based on de Ronde 2013. Id’s are the station ids in the PSMSL dataset. They may change from year to year as the PSMSL 0 point is arbitary. You can lookup the relevant parameters in the schematic diagram like this LRL diagram for station Vlissingen

knitr::kable(mainstations_df[,c('name', 'psmsl_id', 'msl-rlr', 'msl-nap', 'nap-rlr')], caption = "PSMSL inforamtion on the six mains tide gauge stations in the Netherlands.")
PSMSL inforamtion on the six mains tide gauge stations in the Netherlands.
name psmsl_id msl-rlr msl-nap nap-rlr
Vlissingen 20 6976 46 6930
Hoek van Holland 22 6987 114 6873
Den Helder 23 6962 16 6946
Delfzijl 24 6953 130 6823
Harlingen 25 7024 110 6914
IJmuiden 32 7014 64 6950

Sea level measurements for the six main stations (yearly average) are shown in figure @ref(fig:zeespiegelmetingen).

p <- refreshed_df %>%
  dplyr::filter(!grepl("Netherlands", station)) %>%
ggplot(aes(year, height)) +
  geom_point(alpha = 1, aes(color = station), shape = 21, fill = "white", size = 1) +
  geom_line(alpha = 0.5, aes(color = station), linewidth = 0.5) +
  xlab("jaar") + ylab("gemeten zeespiegel in mm") +
  theme_light() +
  theme(legend.position = "bottom")

ggplotly(p) %>% layout(legend = list(x = 0.05, y = 0.95))

Jaarlijks gemiddelde zeespiegel voor de zes hoofdstations langs de Nederlandse kust.

# p
p <- refreshed_df %>%
  dplyr::filter(grepl("Netherlands", station)) %>%
ggplot(aes(year, height)) +
  geom_point(alpha = 1, aes(color = station), shape = 21, fill = "white", size = 1) +
  geom_line(alpha = 0.5, aes(color = station), linewidth = 0.75) +
  xlab("jaar") + ylab("gemeten zeespiegel in mm") +
  theme_light() +
  theme(legend.position = "bottom")

ggplotly(p) %>% layout(legend = list(x = 0.05, y = 0.95))

Jaarlijks gemiddelde zeespiegel voor gemiddelde van stations langs de Nederlandse kust.

# p

Sea level high years

refreshed_df %>%
  filter(year >= 1900) %>%
  dplyr::filter(station == "Netherlands (without Delfzijl)") %>%
  dplyr::arrange(-height) %>%
  dplyr::select(year, station, height_mm = height) %>%
  # dplyr::group_by(station) %>%
  dplyr::slice(c(1:5)) %>%
  knitr::kable(caption = "Overview of the five highest yearly average water levels for the combined station Netherlands (without Delfzijl) in mm during the considered period. ")
Overview of the five highest yearly average water levels for the combined station Netherlands (without Delfzijl) in mm during the considered period.
year station height_mm
2023 Netherlands (without Delfzijl) 152.8
2020 Netherlands (without Delfzijl) 96.6
2022 Netherlands (without Delfzijl) 94.6
2017 Netherlands (without Delfzijl) 94.2
2019 Netherlands (without Delfzijl) 92.0

Storm surge

The expected storm surge per year is determined using output of the Global Tide and Surge Model (GTSM) (ref). This calculates the surge given the bathymetry and climatic conditions. Model results are available from 1950 onwards (figure @ref(fig:gtsm-surge)). The model is run each successive year to calculate the annual average wind and air pressure for use in the Sea Level Monitor. The calculated variation in surge (surge anomaly) is subtracted from the measured sea level before the sea level rise is calculated. For the years before 1950, no runs are available and sea level is corrected for average surge only. This correction reduces the variation due to differences in surge per year and allows a more precise estimate of the long-term trend to be made.

p <- refreshed_df %>%
  # filter(!grepl("Netherlands", station)) %>%
  # filter(station == "IJmuiden") %>%
ggplot(aes(year, surge_anomaly)) +
  geom_point(alpha = 1, aes(color = station), shape = 21, fill = "white", size = 1) +
  geom_line(alpha = 0.5, aes(color = station), linewidth = 0.75) +
  xlab("jaar") + ylab("windopzet in mm") +
  theme_light() +
  theme(legend.position = "bottom") +
  coord_cartesian(xlim = c(1945, params$monitoryear))

# ggplotly(p) %>% layout(legend = list(x = 0.05, y = 0.95))
p
Modelled surge anomaly (deviation of storm surge from long year avearge). The yearly averages surge is calculated for 1950 - now. For earlier years, an average surge is assumed.

Modelled surge anomaly (deviation of storm surge from long year avearge). The yearly averages surge is calculated for 1950 - now. For earlier years, an average surge is assumed.

Trend analysis

The sea level trend is calculated using generalized linear regression. the following models are tested (Deltares 2018 and Deltares 2023):

  • linear model
  • broken linear model (with breakpoint at 1993)
  • broken quadratic model (breakpoint at 1960)

The trend is calculated for all stations individually, for the mean of all stations (“Netherlands”), and for means of all stations minus station Delfzijl (“Netherlands without Delfzijl”).

In the regression equation, nodal tide is one of the components and is estimated as a sinusoid curve with a period of 18.6 years.

byStation <- refreshed_df %>%
  dplyr::group_by(station) %>%
  tidyr::nest() %>%
  dplyr::ungroup()
selectedmodel <- params$modeltype

models <- byStation %>%
  expand_grid(modeltype = selectedmodel) %>%

  mutate(modelfunctionname = paste(modeltype, "model", sep = "_")) %>%
  # add functions for model calculation
  mutate(modelfunctions = map(modelfunctionname, get)) %>%
  # add models based on data and functions
  mutate(model = pmap(
    list(
      data,
      modelfunctions
    ),
    \(.d, .f) .f(.d)
  )) %>%
  mutate(
    glance = map(model, broom::glance),
    rsq    = glance %>% map_dbl("r.squared"),
    adj.rsq = glance %>% map_dbl("adj.r.squared"),
    AIC    = glance %>% map_dbl("AIC"),
    tidy   = map(model, broom::tidy),
    augment = map(model, broom::augment),
    equation = map(model, function(x) equatiomatic::extract_eq(x, ital_vars = TRUE))
  )
eq <- models %>% 
  distinct(modeltype, equation) %>%
  mutate(equation = paste0("$`", equation, "`$"))

knitr::kable(eq, escape = F)
modeltype equation
linear \(`height = \alpha + \beta_{1}(year\ -\ epoch) + \beta_{2}(cos(2\ *\ pi\ *\ (year\ -\ epoch)/(18.613))) + \beta_{3}(sin(2\ *\ pi\ *\ (year\ -\ epoch)/(18.613))) + \epsilon`\)
broken_linear \(`height = \alpha + \beta_{1}(year\ -\ epoch) + \beta_{2}(from1993) + \beta_{3}(cos(2\ *\ pi\ *\ (year\ -\ epoch)/(18.613))) + \beta_{4}(sin(2\ *\ pi\ *\ (year\ -\ epoch)/(18.613))) + \epsilon`\)
broken_squared \(`height = \alpha + \beta_{1}(year\ -\ epoch) + \beta_{2}(from1960\_square) + \beta_{3}(cos(2\ *\ pi\ *\ (year\ -\ epoch)/(18.613))) + \beta_{4}(sin(2\ *\ pi\ *\ (year\ -\ epoch)/(18.613))) + \epsilon`\)

Autocorrelation

Autocorrelation with previous year(s) can sometimes explain part of the otherwise unexplained variance. Especially when a trend is detected in the data, autocorrelation with relatively short lags (1 or few years) often occurs. In case of autocorrelation, we recalculate standard errors of the estimated parameters accordingly.

plot_ACF(models)
Autocorrelation plot for selected stations and models.

Autocorrelation plot for selected stations and models.

There appears to be a consistent autocorrelation for all stations and models with a ‘lag’ of one year. The autocorrelation does not influence the value of the calculated trend parameters, but needs to be taken into account when calculating standard errors. The Newey West autocorrelatie term is used to correctly calculate the standard errors.

At station ‘Vlissingen’ there is autocorrelation with a ‘lag’ of 10 years. This could indicate an effect of the 8.8 year ‘lunar perigee cycle’. Because this only occurs at Vlissingen, this tidal component is not further accounted for in the analysis.

There is no apparent autocorrelation with a ‘lag’ of 18.6 years, the ‘nodal tide’ cycle. This is because the nodal tide is already incorporated in the three models.

require(sandwich)

models <- addHACterms(models)

Heteroskedasticity

Residuals distribution

The distribution of residuals resembles a normal distribution for most stations and model variants. Station Harlingen is an example where the distribution is out of centre when using the linear model. The width of the residuals distribution is narrower when measurements are corrected for surge prior to application of the models.

plotResidualDistribution(models)

Variation of residuals over time

Distribution of residuals should not reveal a clear deviation from a horizontal line.

models %>%
  unnest(c(data, augment), names_sep = "_") %>%
  ggplot(aes(data_year, augment_.resid)) +
  geom_point(alpha = 0.4) +
  facet_grid(station ~ modeltype)

Sea level rise

lookup <- c(
  Constant = "(Intercept)",
  Trend = "I(year - epoch)",
  u_nodal = "I(cos(2 * pi * (year - epoch)/(18.613)))",
  v_nodal = "I(sin(2 * pi * (year - epoch)/(18.613)))",
  `+ trend 1993` = "from1993",
  `+ square_trend 1960` = "from1960_square",
  AR_term = "previousYearHeight"
)

all_predictions <- makePredictionTable(models, lookup)
ggplot(
  all_predictions,
  aes(x = data_year)
) +
  geom_point(aes(y = data_height), alpha = 0.15) +
  geom_line(aes(y = prediction_recalc)) +
  facet_grid(station ~ modeltype)
  p <- plot_station( 
    predictions_all = all_predictions,
    stationi = unique(all_predictions$station),
    correctionVariant = "GTSM", 
    modelVariant = unique(all_predictions$modeltype), 
    printNumbers = F, 
    startyear = 1890
  ) +
  facet_grid(station ~ modeltype) +
  theme(
    # legend.direction = "horizontal",
    # legend.box = "horizontal",
    legend.position = "bottom", #c(0.975, 0.025),
    # legend.justification = c(1, 0),
    legend.title = element_blank()
  ) +
  theme(strip.text.y = element_text(angle = 90)) 

  
  # ggplotly(p) %>% layout(legend = list(x = 0.05, y = 0.95))
  p

Parameters

## gebruik DT in plaats van kableextra
# library(DT)

lookup.df <- data.frame(long_term = unname(lookup),
                        short_term = names(lookup))

parametertable <- models %>%
  select(station, modeltype, tidy) %>% 
  unnest(tidy) %>%
  left_join(models %>%
              select(station, modeltype, tidy.HAC) %>%
              unnest(tidy.HAC),
            by = c(
              station = "station",
              modeltype = "modeltype",
              term = "term.HAC"
            )
  ) %>%
  mutate(across(where(is.numeric), round, 3)) %>%
  left_join(lookup.df, by = c(term = "long_term")) %>%
  select(-term, term = short_term) %>%
  relocate(term, .after = modeltype)

# parametertable %>%
#   DT::datatable(
#     options = list(
#       "digits" = 3
#     )
#   )

  kableExtra::kable(parametertable,
    caption = "Coefficients for all models and stations.",digits = 2
    ) %>%
  kableExtra::scroll_box(height = "500px")
Coefficients for all models and stations.
station modeltype term estimate std.error statistic p.value st.err.HAC
Vlissingen linear Constant -60.77 2.66 -22.84 0.00 4.02
Vlissingen linear Trend 2.25 0.10 22.53 0.00 0.14
Vlissingen linear u_nodal 2.56 3.28 0.78 0.44 4.52
Vlissingen linear v_nodal -11.34 3.14 -3.61 0.00 3.46
Vlissingen broken_linear Constant -63.15 2.31 -27.36 0.00 3.40
Vlissingen broken_linear Trend 1.58 0.15 10.57 0.00 0.22
Vlissingen broken_linear
  • trend 1993
2.02 0.37 5.42 0.00 0.41
Vlissingen broken_linear u_nodal 2.96 2.80 1.06 0.29 3.53
Vlissingen broken_linear v_nodal -8.68 2.72 -3.19 0.00 2.93
Vlissingen broken_squared Constant -69.67 2.90 -24.04 0.00 3.20
Vlissingen broken_squared Trend 1.12 0.24 4.71 0.00 0.38
Vlissingen broken_squared
  • square_trend 1960
0.02 0.00 5.07 0.00 0.01
Vlissingen broken_squared u_nodal 2.59 2.85 0.91 0.36 3.60
Vlissingen broken_squared v_nodal -8.76 2.77 -3.16 0.00 2.91
Hoek van Holland linear Constant -6.65 2.58 -2.58 0.01 3.64
Hoek van Holland linear Trend 2.86 0.10 29.52 0.00 0.11
Hoek van Holland linear u_nodal 3.13 3.18 0.98 0.33 3.90
Hoek van Holland linear v_nodal -10.59 3.05 -3.47 0.00 3.42
Hoek van Holland broken_linear Constant -6.82 2.64 -2.58 0.01 3.67
Hoek van Holland broken_linear Trend 2.81 0.17 16.40 0.00 0.19
Hoek van Holland broken_linear
  • trend 1993
0.14 0.43 0.34 0.73 0.41
Hoek van Holland broken_linear u_nodal 3.16 3.20 0.98 0.33 3.95
Hoek van Holland broken_linear v_nodal -10.39 3.12 -3.33 0.00 3.62
Hoek van Holland broken_squared Constant -8.35 3.25 -2.57 0.01 4.37
Hoek van Holland broken_squared Trend 2.64 0.27 9.90 0.00 0.29
Hoek van Holland broken_squared
  • square_trend 1960
0.00 0.00 0.86 0.39 0.00
Hoek van Holland broken_squared u_nodal 3.13 3.19 0.98 0.33 3.92
Hoek van Holland broken_squared v_nodal -10.09 3.11 -3.25 0.00 3.68
Den Helder linear Constant -65.43 2.69 -24.36 0.00 3.86
Den Helder linear Trend 1.97 0.10 19.57 0.00 0.14
Den Helder linear u_nodal 0.53 3.31 0.16 0.87 3.17
Den Helder linear v_nodal -16.69 3.17 -5.26 0.00 4.07
Den Helder broken_linear Constant -66.34 2.70 -24.60 0.00 3.66
Den Helder broken_linear Trend 1.72 0.17 9.82 0.00 0.24
Den Helder broken_linear
  • trend 1993
0.78 0.43 1.78 0.08 0.46
Den Helder broken_linear u_nodal 0.69 3.27 0.21 0.83 3.02
Den Helder broken_linear v_nodal -15.67 3.18 -4.93 0.00 3.87
Den Helder broken_squared Constant -69.54 3.30 -21.06 0.00 3.59
Den Helder broken_squared Trend 1.45 0.27 5.34 0.00 0.40
Den Helder broken_squared
  • square_trend 1960
0.01 0.00 2.06 0.04 0.01
Den Helder broken_squared u_nodal 0.55 3.24 0.17 0.87 2.94
Den Helder broken_squared v_nodal -15.50 3.16 -4.91 0.00 3.77
Delfzijl linear Constant 11.57 3.31 3.50 0.00 3.99
Delfzijl linear Trend 2.42 0.12 19.50 0.00 0.15
Delfzijl linear u_nodal -6.13 4.08 -1.50 0.14 4.59
Delfzijl linear v_nodal -18.40 3.90 -4.71 0.00 4.10
Delfzijl broken_linear Constant 9.54 3.15 3.02 0.00 3.79
Delfzijl broken_linear Trend 1.85 0.20 9.05 0.00 0.24
Delfzijl broken_linear
  • trend 1993
1.72 0.51 3.38 0.00 0.48
Delfzijl broken_linear u_nodal -5.79 3.82 -1.51 0.13 4.13
Delfzijl broken_linear v_nodal -16.13 3.72 -4.34 0.00 3.65
Delfzijl broken_squared Constant 3.85 3.91 0.98 0.33 4.31
Delfzijl broken_squared Trend 1.44 0.32 4.48 0.00 0.40
Delfzijl broken_squared
  • square_trend 1960
0.02 0.01 3.27 0.00 0.01
Delfzijl broken_squared u_nodal -6.10 3.84 -1.59 0.12 4.25
Delfzijl broken_squared v_nodal -16.16 3.74 -4.32 0.00 3.57
Harlingen linear Constant -8.73 3.27 -2.67 0.01 3.76
Harlingen linear Trend 1.82 0.12 14.85 0.00 0.15
Harlingen linear u_nodal 0.71 4.03 0.18 0.86 4.85
Harlingen linear v_nodal -13.58 3.86 -3.52 0.00 4.47
Harlingen broken_linear Constant -11.54 2.88 -4.01 0.00 3.71
Harlingen broken_linear Trend 1.03 0.19 5.54 0.00 0.22
Harlingen broken_linear
  • trend 1993
2.38 0.46 5.14 0.00 0.48
Harlingen broken_linear u_nodal 1.18 3.48 0.34 0.74 4.04
Harlingen broken_linear v_nodal -10.44 3.39 -3.08 0.00 3.49
Harlingen broken_squared Constant -17.75 3.75 -4.74 0.00 4.86
Harlingen broken_squared Trend 0.68 0.31 2.20 0.03 0.38
Harlingen broken_squared
  • square_trend 1960
0.02 0.01 3.98 0.00 0.01
Harlingen broken_squared u_nodal 0.74 3.68 0.20 0.84 4.39
Harlingen broken_squared v_nodal -10.96 3.58 -3.06 0.00 3.68
IJmuiden linear Constant -41.31 2.79 -14.81 0.00 4.35
IJmuiden linear Trend 2.06 0.10 19.67 0.00 0.15
IJmuiden linear u_nodal -0.75 3.44 -0.22 0.83 3.77
IJmuiden linear v_nodal -11.79 3.29 -3.58 0.00 4.20
IJmuiden broken_linear Constant -42.10 2.82 -14.94 0.00 4.04
IJmuiden broken_linear Trend 1.84 0.18 10.05 0.00 0.27
IJmuiden broken_linear
  • trend 1993
0.67 0.45 1.48 0.14 0.50
IJmuiden broken_linear u_nodal -0.62 3.42 -0.18 0.86 3.58
IJmuiden broken_linear v_nodal -10.90 3.32 -3.28 0.00 4.06
IJmuiden broken_squared Constant -44.67 3.47 -12.88 0.00 3.73
IJmuiden broken_squared Trend 1.63 0.28 5.73 0.00 0.47
IJmuiden broken_squared
  • square_trend 1960
0.01 0.00 1.60 0.11 0.01
IJmuiden broken_squared u_nodal -0.74 3.40 -0.22 0.83 3.55
IJmuiden broken_squared v_nodal -10.82 3.32 -3.26 0.00 3.89
Netherlands linear Constant -28.55 2.28 -12.55 0.00 2.99
Netherlands linear Trend 2.23 0.09 26.12 0.00 0.11
Netherlands linear u_nodal 0.01 2.81 0.00 1.00 3.12
Netherlands linear v_nodal -13.73 2.69 -5.11 0.00 3.05
Netherlands broken_linear Constant -30.07 2.14 -14.05 0.00 2.74
Netherlands broken_linear Trend 1.80 0.14 13.01 0.00 0.18
Netherlands broken_linear
  • trend 1993
1.28 0.34 3.72 0.00 0.36
Netherlands broken_linear u_nodal 0.26 2.59 0.10 0.92 2.67
Netherlands broken_linear v_nodal -12.04 2.52 -4.77 0.00 2.71
Netherlands broken_squared Constant -34.35 2.65 -12.96 0.00 2.92
Netherlands broken_squared Trend 1.50 0.22 6.86 0.00 0.31
Netherlands broken_squared
  • square_trend 1960
0.01 0.00 3.62 0.00 0.00
Netherlands broken_squared u_nodal 0.03 2.60 0.01 0.99 2.68
Netherlands broken_squared v_nodal -12.05 2.54 -4.75 0.00 2.67
Netherlands (without Delfzijl) linear Constant -36.58 2.21 -16.53 0.00 3.02
Netherlands (without Delfzijl) linear Trend 2.19 0.08 26.40 0.00 0.11
Netherlands (without Delfzijl) linear u_nodal 1.24 2.73 0.45 0.65 3.09
Netherlands (without Delfzijl) linear v_nodal -12.80 2.61 -4.90 0.00 3.01
Netherlands (without Delfzijl) broken_linear Constant -37.99 2.10 -18.11 0.00 2.78
Netherlands (without Delfzijl) broken_linear Trend 1.79 0.14 13.20 0.00 0.18
Netherlands (without Delfzijl) broken_linear
  • trend 1993
1.20 0.34 3.54 0.00 0.37
Netherlands (without Delfzijl) broken_linear u_nodal 1.47 2.54 0.58 0.56 2.68
Netherlands (without Delfzijl) broken_linear v_nodal -11.22 2.47 -4.54 0.00 2.71
Netherlands (without Delfzijl) broken_squared Constant -41.99 2.60 -16.17 0.00 2.93
Netherlands (without Delfzijl) broken_squared Trend 1.51 0.21 7.05 0.00 0.31
Netherlands (without Delfzijl) broken_squared
  • square_trend 1960
0.01 0.00 3.45 0.00 0.00
Netherlands (without Delfzijl) broken_squared u_nodal 1.25 2.55 0.49 0.62 2.68
Netherlands (without Delfzijl) broken_squared v_nodal -11.23 2.48 -4.52 0.00 2.69

Which model is the preferred model?

Is there a significant acceleration?

acc_broken_linear <- parametertable %>%
  filter(modeltype == "broken_linear") %>%
  filter(term == "+ trend 1993") %>%
  select(station, p.value )
knitr::kable(acc_broken_linear, caption = "p-values for the acceleration term in the broken linear model for all stations. ")
p-values for the acceleration term in the broken linear model for all stations.
station p.value
Vlissingen 0.000
Hoek van Holland 0.734
Den Helder 0.078
Delfzijl 0.001
Harlingen 0.000
IJmuiden 0.144
Netherlands 0.000
Netherlands (without Delfzijl) 0.001

For the broken linear model, there is a significant acceleration starting in the year 1993 when fitting the average sea level combined for all stations without Delfzijl. For individual stations, the acceleration is not significant for the stations Vlissingen, Hoek van Holland and IJmuiden.

acc_broken_linear <- parametertable %>%
  filter(modeltype == "broken_squared") %>%
  filter(term == "+ square_trend 1960") %>%
  select(station, p.value )
knitr::kable(acc_broken_linear, caption = "p-vallues for the acceleration term in the broken squared model for all stations. ")
p-vallues for the acceleration term in the broken squared model for all stations.
station p.value
Vlissingen 0.000
Hoek van Holland 0.390
Den Helder 0.043
Delfzijl 0.002
Harlingen 0.000
IJmuiden 0.114
Netherlands 0.001
Netherlands (without Delfzijl) 0.001

For the broken squared model, there is a significant acceleration starting in the year 1960 when fitting the average sea level combined for all stations without Delfzijl. For individual stations, the acceleration is not significant for the stations Vlissingen, Hoek van Holland and IJmuiden.

Which model has the lowest Akaike Information Criterion (AIC)?

Of the two models with an acceleration term, the model with lowest AIC is the preferred model.

models %>%
  # filter(station == "Netherlands (without Delfzijl)") |>
  mutate(station = as.character(station)) %>%
  select(station, modeltype, AIC) %>%
  # unite(`modeltype x station`, modeltype, station) %>%
  arrange(-AIC) %>% 
  mutate(modeltype = factor(modeltype, levels=config$runparameters$modeltype)) %>%
  mutate(station = factor(station, levels=config$runparameters$station)) %>%
  ggplot(aes(x = modeltype, y = AIC)) +
  geom_point(size = 3, shape = "|") +
  coord_flip() +
  facet_wrap("station")

For the combined stations Netherlands and Netherlands (without Delfzijl), the non linear model has the lowest AIC, and is therefore the first candidate for the preferred model. In the next section, it is tested whether the non-linear model explains the observed variation significantly better than the simplest model, the linear model.

For stations Den Helder and Hoek van Holland, the broken squared model is the model with lowest AIC.

At station IJmuiden, all three models have similar AIC.

Considering all stations, the broken linear and the broken squared model describe the observed variation approximately equally well.

Is the preferred model significantly better than the linear model?

The broken linear model is chosen as the preferred candidate because it gave a better explanation of the observation, corrected for the degrees of freedom of the model (AIC criterion). Here, it is tested whether the broken linear model is significantly better model that the most simple model, the broken linear model.

bl <- models %>% 
  filter(
    station == "Netherlands (without Delfzijl)",
    modeltype == "broken_linear"
  ) %>%
  select(model) %>%
  unlist(recursive = F) %>%
  unname()

l <- models %>% 
  filter(
    station == "Netherlands (without Delfzijl)",
    modeltype == "linear"
  ) %>%
  select(model) %>%
  unlist(recursive = F) %>%
  unname()

t <- anova(l[[1]], bl[[1]])

# broom::tidy(t) %>% 
#   select(term, rss, p.value)

p_value <- t$`Pr(>F)`[2]

if(p_value<0.01) {
  alternativemodelaccepted = TRUE
  alternativemodelrefused = FALSE
}

The Anova table for model comparison is shown below.

digits = 3
output <- t

rm.cols <- NULL
for (i in 1:(dim(output)[2])) {
  if (all(is.na(output[,i]))) { rm.cols <- c(rm.cols,i) }
}
if (!is.null(rm.cols)) { output <- output[,-rm.cols] }
# Reformat column names
names(output) <- sub("Rsq","R^2^",names(output))
names(output) <- sub("Pr\\(>F\\)","p",names(output))
names(output) <- sub("^P$","p",names(output))
# Rounding
output <- apply(output, 2, signif, digits = digits)
# Generate the kable
options(knitr.kable.NA = '')
knitr::kable(output)
Res.Df RSS Df Sum of Sq F p
75 20900
74 17900 1 3030 12.5 0.000694

The acceleration model (broken linear) has one more degree of freedom as the linear model. The broken linear model is significantly better than the linear model (p < 0.001).

Conclusions

Based on the above analysis, the following conclusions are drawn:

Based on variance analysis the broken linear model is significantly better than the linear model. the broken linear model is accepted as the preferred model.